{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-17T09:17:53.051557Z",
     "start_time": "2020-12-17T09:17:52.751388Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data = pd.read_csv('tmdb_5000_credits.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-17T09:17:53.094068Z",
     "start_time": "2020-12-17T09:17:53.086815Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 4803 entries, 0 to 4802\n",
      "Data columns (total 4 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   movie_id  4803 non-null   int64 \n",
      " 1   title     4803 non-null   object\n",
      " 2   cast      4803 non-null   object\n",
      " 3   crew      4803 non-null   object\n",
      "dtypes: int64(1), object(3)\n",
      "memory usage: 150.2+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-17T09:17:53.184077Z",
     "start_time": "2020-12-17T09:17:53.180788Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4803, 4)"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-17T09:17:53.496677Z",
     "start_time": "2020-12-17T09:17:53.492795Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "movie_id     int64\n",
       "title       object\n",
       "cast        object\n",
       "crew        object\n",
       "dtype: object"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-17T09:17:53.757130Z",
     "start_time": "2020-12-17T09:17:53.750758Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19995</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>285</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>206647</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>49026</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>49529</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                                     title  \\\n",
       "0     19995                                    Avatar   \n",
       "1       285  Pirates of the Caribbean: At World's End   \n",
       "2    206647                                   Spectre   \n",
       "3     49026                     The Dark Knight Rises   \n",
       "4     49529                               John Carter   \n",
       "\n",
       "                                                cast  \\\n",
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
       "\n",
       "                                                crew  \n",
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-17T09:10:51.199220Z",
     "start_time": "2020-12-17T09:10:51.190298Z"
    }
   },
   "source": [
    "# 无数据缺失\n",
    "\n",
    "# 数据类型转换\n",
    "\n",
    "## json\n",
    "\n",
    "- cast\n",
    "- crew"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-17T09:17:57.924665Z",
     "start_time": "2020-12-17T09:17:54.821957Z"
    }
   },
   "outputs": [],
   "source": [
    "data['cast'] = data['cast'].map(eval)\n",
    "data['crew'] = data['crew'].map(eval)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-17T09:21:34.216047Z",
     "start_time": "2020-12-17T09:21:34.175687Z"
    }
   },
   "source": [
    "# 保存数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-17T10:14:39.200836Z",
     "start_time": "2020-12-17T10:14:38.343830Z"
    }
   },
   "outputs": [],
   "source": [
    "data.to_csv('./after_process_tmdb_5000_credits.csv',index=False,encoding = 'utf-8')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 完成"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1rc1"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
